ScaDaMaLe Course site and book

Visualization of the Segmentation by municipalities using Python.

Virginia Jimenez Mohedano (LinkedIn) and Raazesh Sainudiin (LinkedIn).

This project was supported by UAB SENSMETRY through a Data Science Thesis Internship 
between 2022-01-17 and 2022-06-05 to Virginia J.M. and 
Databricks University Alliance with infrastructure credits from AWS to 
Raazesh Sainudiin, Department of Mathematics, Uppsala University, Sweden.

2022, Uppsala, Sweden

# Reading accident frequencies for each municipality previously obtained

import pyspark
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StringType, DoubleType

schema = StructType() \
      .add("municipality", StringType(), True) \
      .add("frequency", DoubleType(), True)

municipality_freq = spark.read.format("csv").option("header", True).schema(schema).load("dbfs:/datasets/lithuania/municipalities_freq.csv")
municipality_freq.show(1000)
+--------------------+--------------------+
|        municipality|           frequency|
+--------------------+--------------------+
|Kaišiadorių rajon...|0.010304096506659964|
|Kelmės rajono sav...|0.010304096506659964|
|Pakruojo rajono s...|0.003853564547206...|
|Skuodo rajono sav...|0.003853564547206...|
|Elektrėnų savival...|0.004356203401189578|
|Kazlų Rūdos saviv...|0.004356203401189578|
|Neringos savivaldybė|0.001507916561950...|
|Birštono savivaldybė|0.001507916561950...|
|Šalčininkų rajono...|0.009047499371701432|
|Švenčionių rajono...|0.005277707966825836|
|Radviliškio rajon...|0.011979559353271342|
|Vilkaviškio rajon...|0.008628633660048589|
|Širvintų rajono s...|0.003518471977883...|
|Klaipėdos miesto ...|0.061405713328306945|
|Panevėžio miesto ...| 0.04959370025969674|
|Panevėžio rajono ...| 0.02345647985255927|
|Kėdainių rajono s...|0.013990114769204993|
|Mažeikių rajono s...|0.013236156488229874|
|Anykščių rajono s...| 0.00636675881712323|
|Šiaulių miesto sa...|0.039205830610706205|
|Klaipėdos rajono ...| 0.02421043813353439|
|Šilutės rajono sa...|0.016419535896791487|
|Raseinių rajono s...|0.014660299907849544|
|Tauragės rajono s...|0.013822568484543855|
|Rokiškio rajono s...|0.008293541090726313|
|Ukmergės rajono s...|0.008042221663734606|
|Šilalės rajono sa...|0.006199212532462093|
|Molėtų rajono sav...|0.005612800536148...|
|Joniškio rajono s...|0.005445254251486974|
|Kupiškio rajono s...|0.005193934824495267|
|Akmenės rajono sa...|0.004272430258859...|
|Ignalinos rajono ...|0.003602245120214459|
|Šiaulių rajono sa...| 0.01792745245874173|
|Plungės rajono sa...|0.015498031331155232|
|Telšių rajono sav...|0.014241434196196699|
|Kretingos rajono ...| 0.01130937421462679|
|Pasvalio rajono s...|0.010806735360643378|
|Palangos miesto s...|0.010387869648990534|
|Varėnos rajono sa...|0.007874675379073468|
|Lazdijų rajono sa...|0.004775069112842423|
|Jurbarko rajono s...|0.003937337689536734|
|Zarasų rajono sav...|0.002680740554578...|
|Vilniaus miesto s...| 0.19736952333082014|
|Vilniaus rajono s...| 0.03602245120214459|
|Jonavos rajono sa...|0.014073887911535563|
|Prienų rajono sav...|0.009382591941023708|
|Šakių rajono savi...|0.008963726229370864|
|Biržų rajono savi...|0.006701851386445506|
|Alytaus miesto sa...| 0.01348747591522158|
|Trakų rajono savi...|0.013403702772891012|
|Alytaus rajono sa...| 0.01206333249560191|
|Utenos rajono sav...|0.010555415933651672|
|Druskininkų saviv...|0.002596967412247...|
|Marijampolės savi...| 0.01357124905755215|
|Kauno miesto savi...| 0.12239256094496105|
|Kauno rajono savi...| 0.02906928038870738|
|Kalvarijos saviva...|0.002261874842925358|
| Pagėgių savivaldybė|0.001424143419619...|
|Visagino savivaldybė|0.002513194269917...|
| Rietavo savivaldybė|0.003183379408561615|
+--------------------+--------------------+
# Calculating colors

# https://matplotlib.org/stable/tutorials/colors/colormaps.html
from matplotlib.cm import viridis
from matplotlib.colors import to_hex

min_freq = municipality_freq.agg({"frequency":"min"}).collect()[0][0]
max_freq = municipality_freq.agg({"frequency":"max"}).collect()[0][0]
freq_range = max_freq - min_freq

def calculate_color(row):
    freq = row["frequency"]
    """
    Convert the freq to a color
    """
    # make freq a number between 0 and 1
    normalized_freq = (freq - min_freq) / freq_range
    
    # This is because in viridis colormap, darker is lower values and we want the opposite
    inverse_freq = 1-normalized_freq
    
    # transform the freq coefficient to a matplotlib color
    mpl_color = viridis(inverse_freq)

    # transform from a matplotlib color to a valid CSS color
    gmaps_color = to_hex(mpl_color, keep_alpha=False)

    return (row["municipality"],gmaps_color)

# Calculate a color for each district
colors = municipality_freq.rdd.map(lambda row: calculate_color(row)).collectAsMap()
//Temporary copy of geojson so python can read it
dbutils.fs.cp("dbfs:/datasets/magellan/municipalities.geojson", "file:/databricks/driver/")
res0: Boolean = true
# Reading and processing geojson (map and borders)

import json
import gmaps
import gmaps.datasets
import gmaps.geojson_geometries
from ipywidgets.embed import embed_minimal_html

gmaps.configure(api_key="AIzaSyDEHHgMMS33M5AT8lav2Q-sem5KOyFx9Sc") # Your Google API key

# municipalities / Savivaldybės
municipalities = json.load(open('municipalities.geojson', 'r'))

# Removing municipality capitals
list_to_remove = []
i = 0
for feature in municipalities['features']:
  if feature["geometry"]["type"] != "Polygon":
    list_to_remove.append(i)
  i+=1
  
# Removing what was found before
for index in sorted(list_to_remove, reverse=True):
    del municipalities['features'][index]
    
# Order the colors by the geojson order

ordered_colors = []
for feature in municipalities['features']:
  municipality = feature['properties']['name']
  color = colors[municipality]
  ordered_colors.append(color)
from pylab import *

# Generating map

fig = gmaps.figure()
freq_layer = gmaps.geojson_layer(
    municipalities,
    fill_color=ordered_colors,
    fill_opacity=0.8,
    stroke_color='black',
    stroke_opacity=1.0,
    stroke_weight=0.2)
fig.add_layer(freq_layer)

embed_minimal_html("export.html", views=[fig])
# Adding color legend to map
cmap = cm.get_cmap('viridis', 20)

gradient = ""
for i in reversed(range(cmap.N)):
    rgba = cmap(i)
    # rgb2hex accepts rgb or rgba
    gradient = gradient + "," + matplotlib.colors.rgb2hex(rgba)

# Removing first comma
gradient = gradient[1:]

html_file_content = open("export.html", 'r').read()\
                    .replace("</head>", """<style>
                                 .legend {
                                   max-width: 430px;
                                 }
                                  .legend div{
                                   background: linear-gradient(to right, """ + gradient + """);
                                   border-radius: 4px;
                                   padding: 10px;
                                 }

                                .legend p {
                                  text-align: justify;
                                  text-justify: inter-word;
                                  margin: 0px;
                                      margin-block-start: 0em;
                                    margin-block-end: 0em;
                                    height: 1em;
                                }
                                .legend p:after {
                                  content: "";
                                  display: inline-block;
                                  width: 100%;
                                }
                              </style>
                            </head>""")\
                    .replace("</body>","""
                          <h2>Relative frequency of accidents</h2>
                          <div class="legend">
                            <p>""" + str(round(min_freq,2)) + " " + str(round(max_freq,2)) +"""</p>
                            <div></div>
                          </div>
                        </body>""")
# !!!!!!!!!!!!!!!!!!!!!
# Can only be run once per cluster restart
displayHTML(html_file_content)
IPyWidget export